---
title: "Replication File: How Threats of American Withdrawal from NATO Affect European Public Attitudes towards Defense"
output: html_document
date: "19/01/2026"
editor_options: 
  chunk_output_type: console
---
## Replication File: How US Hegemonic Withdrawal from NATO Affects European Public Attitudes towards Defense 
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Load Packages
```{r}
library(ggplot2)
library(ggeffects)
library(readr)
library(emmeans)
library(purrr)
library(tidyverse)
library(pwr)
library(dplyr)
library(janitor)
library(marginaleffects)
library(ggthemes)
library(modelsummary)
library(stargazer)
library(png)
library(webshot)
library(RColorBrewer)
library(interflex)
library(nnet)
library(rempsyc)
library(effectsize)
library(flextable)
library(interactions)
library(cowplot)
library(naniar)
library(ggsignif)
library(scales)
library(tableone)
library(MASS)
```

# Load data and merge
```{r}
#Load Data Set
clean_data <- read_csv("C:/Your File Path/jakobbarrett_data_for_replication.csv")

## Arrange data set for analysis 
data_consenting <- clean_data %>% 
  filter(gc_name == 1) #filter on respondents who gave consent (also deletes those who were screened out)
  

# Use suffix_num to transform to numeric columns 
data_consenting_numeric <- data_consenting %>% 
  mutate(across(ends_with("_num"), ~as.numeric(.))) %>% 
  mutate(president_num = case_when(
    president_name == "Trump" ~ 1,     
    president_name == "Harris" ~ 2,
    TRUE ~ NA                   
  ))

#Rescale DV_Spend
data_consenting_numeric$dv_spend_num_01  <- scales::rescale(data_consenting_numeric$dv_spend_num, to = c(0, 1))
```

# Check Randomization
```{r}
#Check Balance and Randomization
table(data_consenting_numeric$president_name, data_consenting_numeric$country)

country_totals <- data_consenting_numeric %>%
  mutate(country = case_when(
    country == "DE" ~ "Germany",
    country == "SE" ~ "Sweden",
    TRUE ~ country
  )) %>%
  group_by(country) %>%
  summarise(total = n(), .groups = "drop")

#Plot Treatment Assignment by Country
data_consenting_numeric %>% 
mutate(president_name = case_when(
  president_name == "control" ~ "Control", 
  TRUE ~ president_name),
  country = case_when(
  country == "DE" ~ "Germany",
  country == "SE" ~ "Sweden",
  TRUE ~ country)) %>% 
ggplot(aes(x = country, fill = president_name)) +
  geom_bar(position = "stack", color = "black") +  
  geom_text(
    stat = "count", 
    aes(label = ..count..), 
    position = position_stack(vjust = 0.5),  
    color = "black",  
    fontface = "bold",
    size = 3                                 
  ) +
  geom_text(
      data = country_totals,
      aes(x = country, y = total, label = total),
      vjust = -0.2,
      size = 4,
      fontface = "bold",
      inherit.aes = FALSE
    ) +
  labs(
    x = "Country",
    y = "Number of Respondents per Country",
    fill = "Treatment Group"
  ) +
  scale_fill_manual(values = c("Harris" = "steelblue", "Trump" = "firebrick", "Control" = "darkgrey")) +
  theme_bw() +
  theme(legend.position = "top",
        axis.title.x = element_text(size = 14, face = "bold"),  
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size = 10))
ggsave("treatmentdistribution_country.png", plot = last_plot(), width = 6.5, height = 4.5)

control_variables <- c(
                      "age_name",
                      "gender_num" ,
                      "education_num" , 
                      "lr_scale_num",
                      "income_num",
                      "political_interest_num" , 
                      "us_opinion_num" , 
                      "nato_opinion_num" ,
                      "economy_opinion_num" , 
                      "threat_perception_num" ,
                      "pre_treat_spend_num")
                       
balance_table <- CreateTableOne(vars = control_variables, strata = "president_name", 
                                data = data_consenting_numeric, test = TRUE)
print(balance_table)
```

# Clean Up Covariates for Later Models
These covariates will be used to test the robustness of the original models. 
```{r}
#Treat Pre-Treat Var as a factor 
data_consenting_completes <- data_consenting_numeric %>% 
mutate(pre_treat_binned = factor(case_when(
    pre_treat_spend_num == 6 ~ "Don't know",
    pre_treat_spend_num %in% c(1, 2) ~ "Too little",
    pre_treat_spend_num == 3 ~ "Just right",
    pre_treat_spend_num %in% c(4, 5) ~ "Too much"), 
    levels = c("Don't know", "Too little", "Just right", "Too much")))

table(data_consenting_completes$country, data_consenting_completes$pre_treat_binned)
class(data_consenting_completes$pre_treat_binned)

#Create a new DF, placing NA for DKs
data_consenting_completes <- data_consenting_completes %>% 
  replace_with_na_at(.vars = c("threat_perception_num","economy_opinion_num",
                               "political_interest_num", "pre_treat_spend_num",
                               "nato_opinion_num", "us_opinion_num"),
                     condition = ~.x == 6) %>% 
  replace_with_na(replace = list(lr_scale_num = 12,
                                 income_num = c(1,2)))
table(data_consenting_completes$country, data_consenting_completes$pre_treat_spend_num)
table(data_consenting_numeric$country, data_consenting_numeric$pre_treat_spend_num) #Match!

## Sort out equivalent categories for recoding purposes (Variable Key)
#Education as an example
data_consenting_numeric %>% 
  dplyr::select(education_name,education_num) %>% 
  distinct() %>% 
  arrange(education_num)
table(data_consenting_numeric$education_num)
class(data_consenting_completes$education_num)

## Make age_name numeric, because the age_num variable was coded strangely by Qualtrics
data_consenting_numeric$age_name <- as.numeric(data_consenting_numeric$age_name)
data_consenting_completes$age_name <- as.numeric(data_consenting_completes$age_name)

## Rename H2 DVs
data_consenting_completes <- data_consenting_completes %>% 
  rename(pref_Autonomy_name = pref_autonomy_name,
         pref_Autonomy_num = pref_autonomy_num)

#Rescale 
data_consenting_completes$pref_NATO_num_01  <- scales::rescale(data_consenting_completes$pref_NATO_num, to = c(0, 1))
data_consenting_completes$pref_EU_num_01 <- rescale(data_consenting_completes$pref_EU_num, to = c(0, 1))
data_consenting_completes$pref_Autonomy_num_01 <- rescale(data_consenting_completes$pref_Autonomy_num, to = c(0, 1))

### We move forward with data_consenting_completes for all analyses that include covariates, as Don't Knows have been marked NA

#Set Harris as Reference for H1 Regressions 
data_Harris_ref <- data_consenting_completes %>%
  mutate(president_name = relevel(factor(president_name), ref = "Harris"))
```

# H1 Test
```{r}
#Testing Hypothesis 1
hyp1mod <- lm(dv_spend_num_01 ~ president_name +
              pre_treat_spend_num, 
              data = data_Harris_ref)
summary(hyp1mod)

#Now with controls
hyp1mod_controls <- lm(dv_spend_num_01 ~ president_name + 
                             pre_treat_spend_num +   
                             country + 
                             age_name + 
                             as.factor(gender_num) +
                             education_num + 
                             political_interest_num + 
                             income_num +
                             lr_scale_num +
                             us_opinion_num + 
                             nato_opinion_num +
                             economy_opinion_num + 
                             threat_perception_num
                           ,
                           data = data_Harris_ref)
summary(hyp1mod_controls)

coef_list <- c(
  "president_nameTrump" = "Trump vs. Harris",
  "president_namecontrol" = "Control vs. Harris",
  "pre_treat_spend_num" = "Pre-Treatment Opinion on Defense Spending",
  "countrySE" = "Sweden",
  "countryUK" = "UK",
  "age_name" = "Age",
  "as.factor(gender_num)2" = "Gender: Female", 
  "as.factor(gender_num)3" = "Gender: Other",
  "education_num" = "Education Level",
  "political_interest_num" = "Political Interest",
  "income_num" = "Income Level",
  "lr_scale_num" = "Ideology on a L-R Scale",
  "us_opinion_num" = "Support for Close Relations with the US",
  "nato_opinion_num" = "Favorability of NATO Membership",
  "economy_opinion_num" = "Perceived State of the Economy",
  "threat_perception_num" = "Perceived Threat Posed by Russia")

h1_models <- list(
  "Main Model"  = hyp1mod, 
  "With Controls" = hyp1mod_controls)

#Combined Regression Table
modelsummary(
  h1_models,
  output = "latex",
  gof_omit = "BIC|AIC|Log.Lik.",
  coef_map = coef_list,
  stars = TRUE
)

#Combined Plot
hyp1mod_tidy <- tidy(hyp1mod, conf.int = TRUE) %>%
    mutate(model = "Main Model")
hyp1mod_controls_tidy <- tidy(hyp1mod_controls, conf.int = TRUE) %>%
    mutate(model = "With Controls")

model_shapes <- c("Main Model" = 16,
                 "With Controls" = 1)

hyp1_plot_data <- bind_rows(hyp1mod_tidy, hyp1mod_controls_tidy)
hyp1_plot_data <- hyp1_plot_data %>% 
  mutate(
    esthigh = estimate + (1.96 * std.error),  # 95% CI upper bound 
    estlow = estimate - (1.96 * std.error),  
    ci90high = estimate + (1.64 * std.error), 
    ci90low = estimate - (1.64 * std.error),  
    term = case_when(
      term == "president_nameTrump" ~ "Trump vs. Harris Treatment")) %>% 
  filter(term != "(Intercept)")

hyp1_main_plot <- ggplot(hyp1_plot_data, aes(x = model, y = estimate)) +
  geom_point(aes(color = term, shape = model), size = 3, position = position_dodge(width = 0.5)) +  
  geom_linerange(
  aes(ymin = estlow, ymax = esthigh, color = term), 
  linewidth = 0.2,
  position = position_dodge(width = 0.5)
      ) +
  geom_linerange(
  aes(ymin = ci90low, ymax = ci90high, color = term), 
  linewidth = 1,
  position = position_dodge(width = 0.5)
    ) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_bw() +
  geom_text(
  aes(
    label = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01  ~ "**",
      p.value < 0.05  ~ "*",
      TRUE            ~ ""),
    group = model),
  position = position_dodge(width = 0.5),
  size = 4.5,
  hjust = -0.75,
  color = "black")+
  labs(
    x = "Comparison: Trump vs. Harris Treatment",
    y = "Effect on Willingness to Spend",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05"
  ) +
  scale_color_manual(values = c("Trump vs. Harris Treatment" = "purple4")) +
  scale_shape_manual(values = model_shapes) +
  theme(axis.title.x = element_text(size = 14, face = "bold"),  
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "none")

# Explicitly print the plot
print(hyp1_main_plot)
ggsave("hyp1_main_plot.png", plot = hyp1_main_plot, width = 6, height = 5)
```

# Country-Level H1 Test
```{r}
#Harris = Ref, Sub Group Models
#Create Items Before Loop
subsamples <- c("DE", "SE", "UK")
hyp1_model1_list <- list()
hyp1_model2_list <- list()

datasets <- list(
  Harrisref = data_Harris_ref,
  Controlref = data_consenting_completes
)

#Loop
for (dataset_name in names(datasets)) {
for (subsample in subsamples) {
  # Filter each dataset for the country subsample
    filtered_data <- datasets[[dataset_name]] %>%
      filter(country == subsample)
    
    # Fit OLS model with dynamic formulas
    formula_1 <- as.formula(dv_spend_num_01 ~ president_name +
                    pre_treat_spend_num)
    #With Controlsc
    formula_2 <- as.formula(dv_spend_num_01 ~ president_name +
                    age_name + as.factor(gender_num) +
                    education_num + income_num + 
                    political_interest_num + lr_scale_num +
                    us_opinion_num + nato_opinion_num +
                    economy_opinion_num + threat_perception_num +
                    pre_treat_spend_num)
    #Two Models
    model_1 <- lm(formula_1, data = filtered_data)
    model_2 <- lm(formula_2, data = filtered_data)
    
    # Create model names 
    model_name1 <- paste("hyp1", "model1", tolower(dataset_name), tolower(subsample), sep = "_")
    model_name2 <- paste("hyp1", "model2", tolower(dataset_name), tolower(subsample), sep = "_")
    
    hyp1_model1_list[[model_name1]] <- model_1
    hyp1_model2_list[[model_name2]] <- model_2
}
}

#Regression Table
names(hyp1_model1_list)
names(hyp1_model2_list)

hyp1_countries <- list(
  "Main Model - Germany" = hyp1_model1_list[["hyp1_model1_harrisref_de"]],
  "Main Model - Sweden" = hyp1_model1_list[["hyp1_model1_harrisref_se"]],
  "Main Model - UK" = hyp1_model1_list[["hyp1_model1_harrisref_uk"]]
)

hyp1_countries_controls <- list(
"Germany With Controls" = hyp1_model2_list[["hyp1_model2_harrisref_de"]],
"Sweden With Controls" = hyp1_model2_list[["hyp1_model2_harrisref_se"]],
"UK With Controls" = hyp1_model2_list[["hyp1_model2_harrisref_uk"]])

modelsummary(hyp1_countries, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = T) 
modelsummary(hyp1_countries_controls, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = T) 

coef_list_controlref <- c(
  "president_nameTrump" = "Trump vs. Control",
  "president_nameHarris" = "Harris vs. Control",
  "pre_treat_spend_num" = "Pre-Treatment Opinion on Defense Spending",
  "countrySE" = "Sweden",
  "countryUK" = "UK",
  "age_name" = "Age",
  "as.factor(gender_num)2" = "Gender: Female", 
  "as.factor(gender_num)3" = "Gender: Other",
  "education_num" = "Education Level",
  "political_interest_num" = "Political Interest",
  "income_num" = "Income Level",
  "lr_scale_num" = "Ideology on a L-R Scale",
  "us_opinion_num" = "Support for Close Relations with the US",
  "nato_opinion_num" = "Favorability of NATO Membership",
  "economy_opinion_num" = "Perceived State of the Economy",
  "threat_perception_num" = "Perceived Threat Posed by Russia")

hyp1_countries_controlref <- list(
  "Germany" = hyp1_model1_list[["hyp1_model1_controlref_de"]],
  "Sweden" = hyp1_model1_list[["hyp1_model1_controlref_se"]],
  "UK" = hyp1_model1_list[["hyp1_model1_controlref_uk"]]
)
hyp1_countries_controls_ref <- list(
  "Germany With Controls" = hyp1_model2_list[["hyp1_model2_controlref_de"]],
  "Sweden With Controls" = hyp1_model2_list[["hyp1_model2_controlref_se"]],
  "UK With Controls" = hyp1_model2_list[["hyp1_model2_controlref_uk"]]
)
modelsummary(hyp1_countries_controlref, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list_controlref, stars = T)
modelsummary(hyp1_countries_controls_ref, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list_controlref, stars = T)

# Extract tidy data for all models in the lists, adding model type and country info
extract_tidy_models <- function(model_list, model_label) {
  bind_rows(
    lapply(names(model_list), function(mod_name) {
      tidy_df <- tidy(model_list[[mod_name]], conf.int = TRUE)
      
      # Split by underscore
      parts <- strsplit(mod_name, "_")[[1]]
      
      # Extract dataset name (3rd part)
      dataset_name <- parts[3]
      
      # Extract country (last part)
      country <- substr(mod_name, nchar(mod_name) - 1, nchar(mod_name))
      
      tidy_df %>% 
        mutate(model = model_label, dataset = dataset_name, country = country, model_name = mod_name)
    })
  )
}

tidy_mod1 <- extract_tidy_models(hyp1_model1_list, "Main Model")
tidy_mod2 <- extract_tidy_models(hyp1_model2_list, "With Controls")

# 2. Combine all tidy data into one data frame
all_tidy_models <- bind_rows(tidy_mod1, tidy_mod2)

# 3. Calculate confidence bounds and recode terms
hyp1_countryplot_data <- all_tidy_models %>%
  mutate(term = case_when(
    dataset == "harrisref" & term == "president_nameTrump" ~ "Trump vs. Harris",
    dataset == "controlref" & term == "president_nameTrump" ~ "Trump vs. Control",
    dataset == "controlref" & term == "president_nameHarris" ~ "Harris vs. Control"),
    esthigh = estimate + (1.96 * std.error),  # 95% CI upper
    estlow = estimate - (1.96 * std.error),
    ci90high = estimate + (1.64 * std.error), # 90% CI upper
    ci90low = estimate - (1.64 * std.error),
      country = case_when(
      country == "de" ~ "Germany",
      country == "se" ~ "Sweden",
      country == "uk" ~ "UK",
      TRUE ~ country)) %>%
  filter(term != "(Intercept)")

# 5. Plot with ggplot: x by model, facet by country to show differences
model_shapes <- c("Main Model" = 16,
                 "With Controls" = 1)

hyp1_countryplot <- ggplot(hyp1_countryplot_data, aes(x = model, y = estimate)) +
  geom_point(aes(color = term, shape = model), size = 3, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = estlow, ymax = esthigh, color = term),
                 linewidth = 0.2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = ci90low, ymax = ci90high, color = term),
                 linewidth = 1, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_text(aes(
            label = case_when(
            p.value < 0.001 ~ "***",
            p.value < 0.01 ~ "**",
            p.value < 0.05 ~ "*",
            p.value < 0.10 ~ "!",
            TRUE ~ ""),
            group = interaction(model, term)),
            position = position_dodge(width = 0.5),
            size = 4,
            vjust = -0.35,
            hjust = -0.65,
            color = "black") +
  facet_wrap(~country) +
  scale_color_manual(values = c("Trump vs. Control" = "firebrick", 
                                "Harris vs. Control" = "steelblue", 
                                "Trump vs. Harris" = "purple4")) + 
  scale_shape_manual(values = model_shapes) +
  labs(
    x = "Model Specification",
    y = "Effect on Willingness to Spend",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05, ! p < 0.10"
  ) +
  theme_bw() +
  guides(shape = "none") + 
  coord_flip()+
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top",
        legend.title = element_blank())
print(hyp1_countryplot)
ggsave("hyp1_countryplot.png", plot = hyp1_countryplot, width = 8, height = 5)

##########Plot just Trump vs. Harris Comparison
hyp1_countryplot_data %>% 
  filter(term == "Trump vs. Harris") %>% 
  ggplot(aes(x = model, y = estimate)) +
  geom_point(aes(color = term, shape = model), size = 3, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = estlow, ymax = esthigh, color = term),
                 linewidth = 0.2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = ci90low, ymax = ci90high, color = term),
                 linewidth = 1, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_text(aes(
            label = case_when(
            p.value < 0.001 ~ "***",
            p.value < 0.01 ~ "**",
            p.value < 0.05 ~ "*",
            p.value < 0.10 ~ "!",
            TRUE ~ ""),
            group = interaction(model, term)),
            position = position_dodge(width = 0.5),
            size = 4,
            vjust = -0.35,
            hjust = -0.65,
            color = "black") +
  facet_wrap(~country) +
  scale_color_manual(values = c("Trump vs. Harris" = "purple4")) + 
  scale_shape_manual(values = model_shapes) +
  labs(
    x = "Model Specification",
    y = "Effect on Willingness to Spend",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05, ! p < 0.10"
  ) +
  theme_bw() +
  guides(shape = "none") + 
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "none")
ggsave("hyp1_countryplot_trumpvsharris.png", plot = last_plot(), height = 5, width = 8)
```

# Marginal Means for Willingness to Spend
```{r}
hyp1_marginal_means_list <- list()

# Loop over main models only (hyp1_model1_list)
for (model_name in names(hyp1_model1_list)) {
  model <- hyp1_model1_list[[model_name]]
  
  # Get emmeans for 'president_name' (the key focal factor) with 'control' as reference
  emm_h1 <- emmeans(model, specs = "president_name", type = "response")
  
  # Convert emmeans object to data frame
  emm_h1_df <- as.data.frame(emm_h1)
  
  # Add identifiers from model name
  parts <- strsplit(model_name, "_")[[1]]
  dataset_name <- parts[3]
  country <- substr(model_name, nchar(model_name) - 1, nchar(model_name))
  
  emm_h1_df <- emm_h1_df %>%
    mutate(model_name = model_name,
           dataset = dataset_name,
           country = country)
  
  # Store in list by model name
  hyp1_marginal_means_list[[model_name]] <- emm_h1_df
}

# Combine and clean in 1 data frame
h1_marginal_means <- bind_rows(hyp1_marginal_means_list) %>% 
  filter(dataset != "harrisref")

h1_marginal_means <- h1_marginal_means %>%
  mutate(
    country = case_when(
      country == "de" ~ "Germany",
      country == "se" ~ "Sweden",
      country == "uk" ~ "UK",
      TRUE ~ country
    ),
    president_name = case_when(
      president_name == "control" ~ "Control",
      TRUE ~ president_name),
    ci95low = emmean - (SE * 1.96),  # 95% CI
    ci95high = emmean + (SE * 1.96),
    ci90low = emmean - (SE * 1.64),  # 90% CI
    ci90high = emmean + (SE * 1.64))

h1_marginal_means_plot <- ggplot(h1_marginal_means, aes(x = president_name, y = emmean, color = president_name)) +
  geom_point(size = 3, position = position_dodge(0.5)) +  # Estimated means
  geom_linerange(aes(ymin = ci95low, ymax = ci95high), linewidth = 0.3, 
                 position = position_dodge(0.5)) +  # Thin 95% CI
  geom_linerange(aes(ymin = ci90low, ymax = ci90high), linewidth = 1, 
                 position = position_dodge(0.5)) +  # Thick 90% CI
  labs(x = "President Treatment",
       y = "Estimated Marginal Mean: Willingness to Spend",
       color = "President Treatment") +
  scale_color_manual(values = c("Harris" = "steelblue", "Trump" = "firebrick", "Control" = "darkgrey")) +
  facet_wrap(~ country, ncol = 3) + 
  theme_bw() +
  theme(legend.position = "none",
        axis.title.x = element_text(size = 14, face = "bold"),  
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size = 10)) 
print(h1_marginal_means_plot)
ggsave("h1_marginal_means_plot.png", plot = h1_marginal_means_plot, width = 6, height = 5)
```

# H2 Test
```{r}
#Testing Hypothesis 2
dvs <- c("pref_NATO_num_01", "pref_EU_num_01", "pref_Autonomy_num_01")

# Create an empty list to store models 
hyp2_model1_list <- list()
hyp2_model2_list <- list()

datasets <- list(
  Harrisref = data_Harris_ref,
  Controlref = data_consenting_completes
)

#Loop
for (dataset_name in names(datasets)) {
for (dv in dvs) {
    # Fit OLS model with dynamic formula
    formula_1 <- as.formula(paste(dv, "~ president_name"))
    #With Controls
    formula_2 <- as.formula(paste(dv, "~ president_name +
                    age_name + as.factor(gender_num) +
                    education_num + income_num + 
                    political_interest_num + lr_scale_num +
                    us_opinion_num +
                    economy_opinion_num + threat_perception_num"))
    
    # Use actual dataset instead of name
    dataset <- datasets[[dataset_name]]
    
    model_1 <- lm(formula_1, data = dataset)
    model_2 <- lm(formula_2, data = dataset)
    
    # Create model names
    model_name1 <- paste("hyp2", "model1", tolower(dataset_name), dv, sep = "_")
    model_name2 <- paste("hyp2", "model2", tolower(dataset_name), dv, sep = "_")
    
    hyp2_model1_list[[model_name1]] <- model_1
    hyp2_model2_list[[model_name2]] <- model_2
  }
}

#Regression Table
names(hyp2_model1_list)
names(hyp2_model2_list)

hyp2_list <- list(
  "Main Model NATO" = hyp2_model1_list[["hyp2_model1_harrisref_pref_NATO_num_01"]],
  "NATO with Controls" = hyp2_model2_list[["hyp2_model2_harrisref_pref_NATO_num_01"]],
  "Main Model EU" = hyp2_model1_list[["hyp2_model1_harrisref_pref_EU_num_01"]],
  "EU with Controls" = hyp2_model2_list[["hyp2_model2_harrisref_pref_EU_num_01"]],
  "Main Model Autonomy" = hyp2_model1_list[["hyp2_model1_harrisref_pref_Autonomy_num_01"]],
  "Autonomy with Controls" = hyp2_model2_list[["hyp2_model2_harrisref_pref_Autonomy_num_01"]]
)

modelsummary(
  hyp2_list,
  output = "latex",
  gof_omit = "BIC|AIC|Log.Lik.",
  coef_map = coef_list,
  stars = TRUE
)


hyp2_controlref <- list(
  "NATO" = hyp2_model1_list[["hyp2_model1_controlref_pref_NATO_num_01"]],
  "NATO with Controls" = hyp2_model2_list[["hyp2_model2_controlref_pref_NATO_num_01"]],
  "EU" = hyp2_model1_list[["hyp2_model1_controlref_pref_EU_num_01"]],
  "EU with Controls" = hyp2_model2_list[["hyp2_model2_controlref_pref_EU_num_01"]],
  "Autonomy" = hyp2_model1_list[["hyp2_model1_controlref_pref_Autonomy_num_01"]],
  "Autonomy with Controls" = hyp2_model2_list[["hyp2_model2_controlref_pref_Autonomy_num_01"]]
)

modelsummary(hyp2_controlref, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list_controlref, stars = TRUE)


#Extract
extract_tidy_dv <- function(model_list, model_label) {
  bind_rows(
    lapply(names(model_list), function(mod_name) {
      tidy_df <- tidy(model_list[[mod_name]], conf.int = TRUE)
      
      parts <- strsplit(mod_name, "_")[[1]]
      dataset_name <- parts[3]     # 3rd part is dataset
      dv <- paste(parts[4:length(parts)], collapse = "_")  # remaining parts form the dv
      
     # Simplify dv string, extract "institution" only
      dv_simple <- str_extract(dv, "(?<=pref_)[^_]+(?=_num)")
      
      tidy_df %>% 
        mutate(model = model_label,
               dataset = dataset_name,
               dv = dv_simple,
               model_name = mod_name)
    })
  )
}

# Tidy Models
tidy_hyp2_mod1 <- extract_tidy_dv(hyp2_model1_list, "Main Model")
tidy_hyp2_mod2 <- extract_tidy_dv(hyp2_model2_list, "With Controls")

# 2. Combine all tidy data into one data frame
tidy_hyp2_models <- bind_rows(tidy_hyp2_mod1, tidy_hyp2_mod2)

# 3. Calculate confidence bounds and recode terms
hyp2_plot_data <- tidy_hyp2_models %>%
  mutate(term = case_when(
    dataset == "harrisref" & term == "president_nameTrump" ~ "Trump vs. Harris",
    dataset == "controlref" & term == "president_nameTrump" ~ "Trump vs. Control",
    dataset == "controlref" & term == "president_nameHarris" ~ "Harris vs. Control"),
    esthigh = estimate + (1.96 * std.error),  # 95% CI upper
    estlow = estimate - (1.96 * std.error),
    ci90high = estimate + (1.64 * std.error), # 90% CI upper
    ci90low = estimate - (1.64 * std.error)) %>%
  filter(term != "(Intercept)")

#Plot
model_shapes <- c("Main Model" = 16,
                 "With Controls" = 1)

hyp2_plot <- ggplot(hyp2_plot_data, aes(x = model, y = estimate)) +
  geom_point(aes(color = term, shape = term), size = 3, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = estlow, ymax = esthigh, color = term),
                 linewidth = 0.2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = ci90low, ymax = ci90high, color = term),
                 linewidth = 1, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_text(aes(
            label = case_when(
            p.value < 0.001 ~ "***",
            p.value < 0.01 ~ "**",
            p.value < 0.05 ~ "*",
            p.value < 0.10 ~ "!",
            TRUE ~ ""),
            group = interaction(model, term)),
            position = position_dodge(width = 0.5),
            size = 4,
            vjust = -0.35,
            hjust = -0.75,
            color = "black") +
  facet_wrap(~dv) +
  scale_color_manual(values = c("Trump vs. Control" = "firebrick", 
                                "Harris vs. Control" = "steelblue", 
                                "Trump vs. Harris" = "purple4")) + 
  #scale_shape_manual(values = model_shapes) +
  labs(
    x = "Model Specification",
    y = "Effect on Institutional Preferences",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05, ! p < 0.10"
  ) +
  theme_bw() +
  #guides(shape = "none") + 
  coord_flip()+
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top",
        legend.title = element_blank())
print(hyp2_plot)
ggsave("hyp2_main_plot.png", plot = hyp2_plot, width = 7.5, height = 5)
```

#Country-Level H2 Test
```{r}
# Define country subsamples
subsamples <- c("DE", "SE", "UK")

# Initialize empty lists to store models
hyp2_model1_countries_list <- list()
hyp2_model2_countries_list <- list()

datasets <- list(
  Harrisref = data_Harris_ref,
  Controlref = data_consenting_completes
)

for (dataset_name in names(datasets)) {
  for (subsample in subsamples) {
    # Filter dataset by country subsample
    filtered_data <- datasets[[dataset_name]] %>% filter(country == subsample)
    
    for (dv in dvs) {
      # OLS dynamic formulas
      formula_1 <- as.formula(paste(dv, "~ president_name"))
      formula_2 <- as.formula(paste(dv, "~ president_name + age_name +
                                    as.factor(gender_num) + 
                                    education_num + income_num +
                                    political_interest_num + lr_scale_num + 
                                    us_opinion_num +
                                    economy_opinion_num + 
                                    threat_perception_num"))
      
      # Fit models 
      model_1 <- lm(formula_1, data = filtered_data)
      model_2 <- lm(formula_2, data = filtered_data)
      
      # Create model names including dataset, country and dv info
      model_name1 <- paste("hyp2", "model1", tolower(dataset_name), tolower(subsample), dv, sep = "_")
      model_name2 <- paste("hyp2", "model2", tolower(dataset_name), tolower(subsample), dv, sep = "_")
      
      # Save models
      hyp2_model1_countries_list[[model_name1]] <- model_1
      hyp2_model2_countries_list[[model_name2]] <- model_2
    }
  }
}

#Regression Table
names(hyp2_model1_countries_list)

hyp2_de_list <- list(
  "NATO" = hyp2_model1_countries_list[["hyp2_model1_harrisref_de_pref_NATO_num_01"]],
  "NATO with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_de_pref_NATO_num_01"]],
  "EU" = hyp2_model1_countries_list[["hyp2_model1_harrisref_de_pref_EU_num_01"]],
  "EU with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_de_pref_EU_num_01"]],
  "Autonomy" = hyp2_model1_countries_list[["hyp2_model1_harrisref_de_pref_Autonomy_num_01"]],
  "Autonomy with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_de_pref_Autonomy_num_01"]])

modelsummary(hyp2_de_list, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)

hyp2_se_list <- list(
  "NATO" = hyp2_model1_countries_list[["hyp2_model1_harrisref_se_pref_NATO_num_01"]],
  "NATO with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_se_pref_NATO_num_01"]],
  "EU" = hyp2_model1_countries_list[["hyp2_model1_harrisref_se_pref_EU_num_01"]],
  "EU with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_se_pref_EU_num_01"]],
  "Autonomy" = hyp2_model1_countries_list[["hyp2_model1_harrisref_se_pref_Autonomy_num_01"]],
  "Autonomy with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_se_pref_Autonomy_num_01"]]
  )
modelsummary(hyp2_se_list, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)

hyp2_uk_list <- list(
  "NATO" = hyp2_model1_countries_list[["hyp2_model1_harrisref_uk_pref_NATO_num_01"]],
  "NATO with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_uk_pref_NATO_num_01"]],
  "EU" = hyp2_model1_countries_list[["hyp2_model1_harrisref_uk_pref_EU_num_01"]],
  "EU with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_uk_pref_EU_num_01"]],
  "Autonomy" = hyp2_model1_countries_list[["hyp2_model1_harrisref_uk_pref_Autonomy_num_01"]],
  "Autonomy with Controls" = hyp2_model2_countries_list[["hyp2_model2_harrisref_uk_pref_Autonomy_num_01"]])
modelsummary(hyp2_uk_list, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)

hyp2_country_control_ref <- list(
  "Germany - NATO" = hyp2_model1_countries_list[["hyp2_model1_controlref_de_pref_NATO_num_01"]],
  "Germany - EU" = hyp2_model1_countries_list[["hyp2_model1_controlref_de_pref_EU_num_01"]],
  "Germany - Autonomy" = hyp2_model1_countries_list[["hyp2_model1_controlref_de_pref_Autonomy_num_01"]],
  "Sweden - NATO" = hyp2_model1_countries_list[["hyp2_model1_controlref_se_pref_NATO_num_01"]],
  "Sweden - EU" = hyp2_model1_countries_list[["hyp2_model1_controlref_se_pref_EU_num_01"]],
  "Sweden - Autonomy" = hyp2_model1_countries_list[["hyp2_model1_controlref_se_pref_Autonomy_num_01"]],
  "UK - NATO" = hyp2_model1_countries_list[["hyp2_model1_controlref_uk_pref_NATO_num_01"]],
  "UK - EU" = hyp2_model1_countries_list[["hyp2_model1_controlref_uk_pref_EU_num_01"]],
  "UK - Autonomy" = hyp2_model1_countries_list[["hyp2_model1_controlref_uk_pref_Autonomy_num_01"]]
)

modelsummary(hyp2_country_control_ref, output = "latex", gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list_controlref, stars = TRUE)


#Extract Tidy Models
extract_country_institutions <- function(model_list, model_label) {
  bind_rows(
    lapply(names(model_list), function(mod_name) {
      tidy_df <- tidy(model_list[[mod_name]], conf.int = TRUE)
      
      parts <- strsplit(mod_name, "_")[[1]]
      
      dataset_name <- parts[3]                
      country <- parts[4]                
      full_dv <- paste(parts[5:length(parts)], collapse = "_")  
      
      # Extract institution
      dv_simple <- str_extract(full_dv, "(?<=pref_)[^_]+(?=_num)")
      
      tidy_df %>% 
        mutate(model = model_label,
               dataset = dataset_name,
               country = country,
               dv = dv_simple,
               model_name = mod_name)
    })
  )
}

# Tidy Models
tidy_hyp2_countries_mod1 <- extract_country_institutions(hyp2_model1_countries_list, "Main Model")
tidy_hyp2_countries_mod2 <- extract_country_institutions(hyp2_model2_countries_list, "With Controls")

# Work only with Main Model
hyp2_countryplot_data <- tidy_hyp2_countries_mod1 %>%
  mutate(term = case_when(
    dataset == "harrisref" & term == "president_nameTrump" ~ "Trump vs. Harris",
    dataset == "controlref" & term == "president_nameTrump" ~ "Trump vs. Control",
    dataset == "controlref" & term == "president_nameHarris" ~ "Harris vs. Control"),
    esthigh = estimate + (1.96 * std.error),  # 95% CI upper
    estlow = estimate - (1.96 * std.error),
    ci90high = estimate + (1.64 * std.error), 
    ci90low = estimate - (1.64 * std.error),
      country = case_when(
      country == "de" ~ "Germany",
      country == "se" ~ "Sweden",
      country == "uk" ~ "UK",
      TRUE ~ country)) %>%
  filter(term != "(Intercept)")

#Plot
hyp2_countryplot <- ggplot(hyp2_countryplot_data, aes(x = term, y = estimate)) +
  geom_point(aes(color = term, shape = term), size = 2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = estlow, ymax = esthigh, color = term),
                 linewidth = 0.2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = ci90low, ymax = ci90high, color = term),
                 linewidth = 1, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_text(aes(
            label = case_when(
            p.value < 0.001 ~ "***",
            p.value < 0.01 ~ "**",
            p.value < 0.05 ~ "*",
            p.value < 0.10 ~ "!",
            TRUE ~ ""),
            group = interaction(model, term)),
            position = position_dodge(width = 0.5),
            size = 4,
            vjust = -0.35,
            hjust = -0.65,
            color = "black") +
  facet_grid(cols = vars(country), rows = vars(dv)) +
  scale_color_manual(values = c("Trump vs. Control" = "firebrick", 
                                "Harris vs. Control" = "steelblue", 
                                "Trump vs. Harris" = "purple4")) + 
  labs(
    x = "Treatment Group Comparison",
    y = "Effect on Institutional Preferences",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05, ! p < 0.10"
  ) +
  theme_bw() +
  guides(shape = "none") + 
  coord_flip()+
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "none")
print(hyp2_countryplot)
ggsave("hyp2_countryplot.png", plot = hyp2_countryplot, width = 7.5, height = 6)

#Separate Plot for Models with Controls 
hyp2_countryplotcontrols_data <- tidy_hyp2_countries_mod2 %>%
  mutate(term = case_when(
    dataset == "harrisref" & term == "president_nameTrump" ~ "Trump vs. Harris",
    dataset == "controlref" & term == "president_nameTrump" ~ "Trump vs. Control",
    dataset == "controlref" & term == "president_nameHarris" ~ "Harris vs. Control"),
    esthigh = estimate + (1.96 * std.error),  # 95% CI upper
    estlow = estimate - (1.96 * std.error),
    ci90high = estimate + (1.64 * std.error), 
    ci90low = estimate - (1.64 * std.error),
      country = case_when(
      country == "de" ~ "Germany",
      country == "se" ~ "Sweden",
      country == "uk" ~ "UK",
      TRUE ~ country)) %>%
  filter(term != "(Intercept)")

#Plot
hyp2_countryplot_controls <- ggplot(hyp2_countryplotcontrols_data, aes(x = term, y = estimate)) +
  geom_point(aes(color = term, shape = term), size = 2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = estlow, ymax = esthigh, color = term),
                 linewidth = 0.2, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = ci90low, ymax = ci90high, color = term),
                 linewidth = 1, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_text(aes(
            label = case_when(
            p.value < 0.001 ~ "***",
            p.value < 0.01 ~ "**",
            p.value < 0.05 ~ "*",
            p.value < 0.10 ~ "!",
            TRUE ~ ""),
            group = interaction(model, term)),
            position = position_dodge(width = 0.5),
            size = 4,
            vjust = -0.35,
            hjust = -0.65,
            color = "black") +
  facet_grid(cols = vars(country), rows = vars(dv)) +
  scale_color_manual(values = c("Trump vs. Control" = "firebrick", 
                                "Harris vs. Control" = "steelblue", 
                                "Trump vs. Harris" = "purple4")) +
  scale_shape_manual(values = c("Trump vs. Control" = 2, 
                                "Harris vs. Control" = 1, 
                                "Trump vs. Harris" = 0)) +
  labs(
    x = "Treatment Group Comparison",
    y = "Effect on Institutional Preferences",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05, ! p < 0.10"
  ) +
  theme_bw() +
  coord_flip()+
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "none")
print(hyp2_countryplot_controls)
ggsave("hyp2_countryplot_controls.png", plot = hyp2_countryplot_controls, width = 7.5, height = 6)
```

#Marginal Means for Institutional Preferences
```{r}
# Marginal Means 
dvs <- c("pref_NATO_num_01", "pref_EU_num_01", "pref_Autonomy_num_01")
subsamples <- c("DE", "SE", "UK")

hyp2_mmeans_list <- list()

for (subsample in subsamples) {
  for (dv in dvs) {
    formula <- as.formula(paste(dv, "~ president_name +
                      pre_treat_spend_num"))
    model <- lm(formula, data = data_consenting_completes, subset = (country == subsample), model = TRUE,
                x = TRUE, y = TRUE)
    
    model_name <- paste("model", dv, subsample, sep = "_")
    hyp2_mmeans_list[[model_name]] <- model
  }
}
# Step 1: Calculate Marginal Means for each model
marginal_means_results <- imap_dfr(hyp2_mmeans_list, ~ {
  # Extract country code from model name to subset data
  country_code <- str_extract(.y, "DE|SE|UK")
  data_sub <- data_consenting_completes %>% filter(country == country_code)
  emmeans(.x, ~ president_name, data = data_sub, type = "response", conf.int = TRUE) %>%
    as.data.frame() %>%
    mutate(model = .y)
})

# Step 2: Extract country and DV information from model names
marginal_means_results <- marginal_means_results %>%
  mutate(country = str_sub(model, start = -2, end = -1),  # Extract last two characters (country)
         dv = str_extract(model, "(?<=model_pref_)[^_]+(?=_num)")) %>%  # Extract DV from model name
  mutate(
    president_name = case_when(
      president_name == "control" ~ "Control",
      TRUE ~ president_name),  # Keep other terms as they are
    country = case_when(
      country == "DE" ~ "Germany",
      country == "SE" ~ "Sweden",
      TRUE ~ country),
    ci95low = emmean - (SE * 1.96),  # 95% CI
    ci95high = emmean + (SE * 1.96),
    ci90low = emmean - (SE * 1.64),  # 90% CI
    ci90high = emmean + (SE * 1.64))

View(marginal_means_results)

# Step 3: Filter results for specific terms (e.g., "Trump" and "Harris" for president_name)
h2_marginalmeans_plot <- marginal_means_results %>%
  ggplot(aes(x = emmean, y = president_name, color = president_name)) +
  geom_point(size = 2) +  
  geom_linerange(aes(xmin = ci95low, xmax = ci95high), linewidth = 0.3) +  # Thin 95% CI
  geom_linerange(aes(xmin = ci90low, xmax = ci90high), linewidth = 1) +  # Thick 90% CI
  facet_grid(cols = vars(country), rows = vars(dv)) +  
  labs(
    title = "Marginal Means of Institutional Preferences Across Countries",
    x = "Support for Defence Frameworks on a 0-1 Scale",
    y = "Treatment Condition"
  ) +
  scale_color_manual(values = c("Harris" = "steelblue", "Trump" = "firebrick", "Control" = "grey40")) +
  theme_bw() +
  theme(legend.position = "none",
        axis.title.x = element_text(size = 14, face = "bold"),  
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size = 10)) 
print(h2_marginalmeans_plot)
ggsave("h2_marginalmeans.png",plot = h2_marginalmeans_plot, width = 7.5, height = 6)
```

# Confidence in NATO Allies as a Mechanism
```{r}
#Confidence in NATO allies is measured post treatment and can be considered a potential mediator in X --> Y
#Ensure Confidence in Nato is Rescaled
data_consenting_completes$confidence_nato_num_01  <- scales::rescale(data_consenting_completes$confidence_nato_num, to = c(0, 1))
data_Harris_ref$confidence_nato_num_01  <- scales::rescale(data_Harris_ref$confidence_nato_num, to = c(0, 1))

test_mod <- lm(confidence_nato_num_01 ~ president_name, 
               data = data_Harris_ref)
summary(test_mod)
#There is a significant, negative effect on confidence in NATO for those in the Trump condition 

test_mod_control <- lm(confidence_nato_num_01 ~ president_name + ally_name, 
               data = data_Harris_ref)
summary(test_mod_control)
#The effect is consistent whether or not we control for the 2nd vignette treatment

################################################################################## Store all Autonomy models in a list
mod_list_autonomy <- list()
mod_list_autonomy_c <- list() #control for 2nd treatment 

# Model 0a (mediator model, no covariates)
mod_list_autonomy[["mod0"]] <- lm(confidence_nato_num_01 ~ president_name, data = data_consenting_completes) #We want to explore why Trump had a positive effect on autonomy relative to the control (H2)
mod_list_autonomy_c[["mod0c"]] <- lm(confidence_nato_num_01 ~ president_name + ally_name, data = data_consenting_completes) #We only include a control for Vignette 2 in the Mediator model, because V2 can affect the Mediator

# Models 1a–4a and 1b-4b (outcomes, no covariates)
mod_list_autonomy[["mod1a"]] <- lm(pref_Autonomy_num_01 ~ president_name, data = data_consenting_completes)
mod_list_autonomy[["mod1b"]] <- lm(pref_Autonomy_num_01 ~ president_name + confidence_nato_num_01, data = data_consenting_completes)

mod_list_autonomy_c[["mod1ac"]] <- lm(pref_Autonomy_num_01 ~ president_name, data = data_consenting_completes)
mod_list_autonomy_c[["mod1bc"]] <- lm(pref_Autonomy_num_01 ~ president_name + confidence_nato_num_01, data = data_consenting_completes)


# Model names in the list:
mod_list_autonomy_names <- c("Trump Effect on Confidence in NATO", 
                 "Direct Trump Effect on Autonomy Preference", 
                 "Mediator Effect on Autonomy Preference")


names(mod_list_autonomy) <- mod_list_autonomy_names
names(mod_list_autonomy_c) <- mod_list_autonomy_names

#Get 4 regression tables
# Custom labels for coefficients
coef_labels_autonomy <- c(
  "president_nameTrump" = "Trump vs. Control",
  "president_nameHarris" = "Harris vs. Control",
  "confidence_nato_num_01" = "Confidence in NATO Allies",
  "ally_nameoutgroup" = "2nd Vignette Treatment",      
  "pre_treat_spend_num" = "Pre-Treatment Opinion on Defense Spending"
)

modelsummary(mod_list_autonomy, output = "latex", coef_map = coef_labels_autonomy, gof_omit = "BIC|AIC|Log.Lik.", stars = TRUE)
modelsummary(mod_list_autonomy_c, output = "latex", coef_map = coef_labels_autonomy, gof_omit = "BIC|AIC|Log.Lik.", stars = TRUE)

############################################################ EU Preferences
# Store all models in a list
mod_list_EU <- list()
mod_list_EU_c <- list() #control for 2nd treatment

# Model 0 (mediator model, no covariates)
mod_list_EU[["mod0"]] <- lm(confidence_nato_num_01 ~ president_name, data = data_Harris_ref)
mod_list_EU_c[["mod0c"]] <- lm(confidence_nato_num_01 ~ president_name + ally_name, data = data_Harris_ref)

# Models 1a–4a and 1b-4b (outcomes, no covariates)
mod_list_EU[["mod2a"]] <- lm(pref_EU_num_01 ~ president_name, data = data_Harris_ref)
mod_list_EU[["mod2b"]] <- lm(pref_EU_num_01 ~ president_name + confidence_nato_num_01, data = data_Harris_ref)

mod_list_EU_c[["mod2ac"]] <- lm(pref_EU_num_01 ~ president_name, data = data_Harris_ref)
mod_list_EU_c[["mod2bc"]] <- lm(pref_EU_num_01 ~ president_name + confidence_nato_num_01, data = data_Harris_ref)

# Model names in the list: 
mod_list_EU_names <- c("Trump Effect on Confidence in NATO", 
                 "Direct Trump Effect on EU Preference", 
                 "Mediator Effect on EU Preference")

names(mod_list_EU) <- mod_list_EU_names
names(mod_list_EU_c) <- mod_list_EU_names

coef_labels_EU <- c(
  "president_nameTrump" = "Trump vs. Harris",
  "president_namecontrol" = "Control vs. Harris",
  "confidence_nato_num_01" = "Confidence in NATO Allies",
  "ally_nameoutgroup" = "2nd Vignette Treatment",      
  "pre_treat_spend_num" = "Pre-Treatment Opinion on Defense Spending"
)

#Get a regression table
modelsummary(mod_list_EU, output = "latex", coef_map = coef_labels_EU, gof_omit = "BIC|AIC|Log.Lik.", stars = TRUE)
modelsummary(mod_list_EU_c, output = "latex", coef_map = coef_labels_EU, gof_omit = "BIC|AIC|Log.Lik.", stars = TRUE)
```

Mediation Diagrams of the causal relationship between IV and Autonomy preference. 
```{r}
# Specify All 3 Models
m_model <- lm(confidence_nato_num_01 ~ president_name, data = data_consenting_completes)
y_total <- lm(pref_Autonomy_num_01 ~ president_name, data = data_consenting_completes)
y_direct <- lm(pref_Autonomy_num_01 ~ president_name + confidence_nato_num_01, data = data_consenting_completes)

# Extract path coefficients by term name
a_path <- tidy(m_model) %>% filter(term == "president_nameTrump")    # path a coefficient
b_path <- tidy(y_direct) %>% filter(term == "confidence_nato_num_01") # path b coefficient
c_prime_path <- tidy(y_direct) %>% filter(term == "president_nameTrump")  # path c' coefficient
c_path <- tidy(y_total) %>% filter(term == "president_nameTrump")    # total effect c coefficient

# Extract Coefficients and P Values
coef_a <- round(a_path$estimate, 3)
coef_b <- round(b_path$estimate, 3)
coef_c_prime <- round(c_prime_path$estimate, 3)
coef_c <- round(c_path$estimate, 3)

annotations <- tibble(
  path = c("a", "b", "c'"),
  x = c(2, 4, 3),
  y = c(2.7, 2.7, 0.7),
  estimate = c(coef_a, coef_b, coef_c_prime),
  p_value = c(a_path$p.value, b_path$p.value, c_prime_path$p.value)
)

# Define nodal positions for diagram
nodes <- data.frame(
  label = c("Trump \nvs. Control", "Confidence\nin NATO Allies", "Preference\nfor Autonomy"),
  x = c(1, 3, 5),
  y = c(1, 3, 1)
)

#Add halfway points for arrows
annotations <- annotations %>%
  mutate(
    x_mid = case_when(
      path == "a" ~ (1.6 + 2.4) / 2,
      path == "b" ~ (3.6 + 4.4) / 2,
      path == "c'" ~ (1.6 + 4.4) / 2,
      TRUE ~ x  
    ),
    y_mid = case_when(
      path == "a" ~ (1.6 + 2.4) / 2,
      path == "b" ~ (2.4 + 1.6) / 2,
      path == "c'" ~ 1,
      TRUE ~ y
    )
  )

# Make the mediation diagram
mediation_Y4 <- ggplot() +
  # Draw node boxes
  geom_rect(aes(xmin=x-0.6, xmax=x+0.6, ymin=y-0.6, ymax=y+0.6), fill="grey95", color="grey50", data=nodes) +
  geom_text(aes(x=x, y=y, label=label), data=nodes, size=4, fontface="bold") +
  # Arrows
  geom_segment(aes(x=1.6, y=1.6, xend=2.4, yend=2.4), arrow=arrow(length=unit(0.2,"cm")), size=1, color="grey20") + # X to M
  geom_segment(aes(x=3.6, y=2.4, xend=4.4, yend=1.6), arrow=arrow(length=unit(0.2,"cm")), size=1, color="grey20") + # M to Y
  geom_segment(aes(x=1.6, y=1, xend=4.4, yend=1), arrow=arrow(length=unit(0.2,"cm")), size=1, color="grey20") +     # X to Y
  #P values
  geom_label(data = annotations,
           aes(x = x_mid, y = y_mid,
               label = paste0(path, " = ", estimate, " ", 
                              case_when(
                                p_value < 0.001 ~ "***",
                                p_value < 0.01 ~ "**",
                                p_value < 0.05 ~ "*",
                                TRUE ~ ""
                              ))),
           fill = "white",     
           color = "black",
           size = 5,
           label.padding = unit(0.15, "lines"),
           label.r = unit(0.2, "lines")) +
  labs(caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05")+
  theme_void()
print(mediation_Y4)

# Make the diagram for X --> Y without M 
nodes_c <- data.frame(
  label = c("Trump \nvs. Control", "Preference\nfor Autonomy"),
  x = c(1, 5),
  y = c(1, 1)
)
# Calculate midpoint for arrow between the two nodes in nodes_c
x_mid <- mean(c(nodes_c$x[1], nodes_c$x[2]))  # midpoint x between 1 and 5 = 3
y_mid <- mean(c(nodes_c$y[1], nodes_c$y[2]))  # midpoint y between 1 and 1 = 1

# Use midpoint coordinates for c_path annotation
c_annotations <- tibble(
  path = "c",
  x = x_mid,
  y = y_mid + 0.3,
  estimate = coef_c,
  p_value = c_path$p.value
)

X_on_Y4 <- ggplot() +
  geom_rect(aes(xmin = x - 0.6, xmax = x + 0.6, ymin = y - 0.6, ymax = y + 0.6),
            fill = "grey95", color = "grey50", data = nodes_c) +
  geom_text(aes(x = x, y = y, label = label), data = nodes_c, size = 4, fontface = "bold") +
  geom_segment(aes(x = 1.6, y = 1, xend = 4.4, yend = 1),
               arrow = arrow(length = unit(0.03, "npc")), size = 1.3, color = "grey20") +
  geom_label(data = c_annotations,
            aes(x = x_mid, y = y_mid,
                label = paste0(path, " = ", estimate, " ",
                               case_when(
                                   p_value < 0.001 ~ "***",
                                   p_value < 0.01 ~ "**",
                                   p_value < 0.05 ~ "*",
                                   p_value < 0.10 ~ "!",
                                   TRUE ~ ""
                               ))),
            color = "black",
            size = 5) +
  theme_void()
print(X_on_Y4)

library(patchwork)
Y4_diagram <- X_on_Y4 / mediation_Y4
print(Y4_diagram)
ggsave("Autonomy_diagram.png", plot = Y4_diagram, width=6.5, height=4)
```

For the Appendix: Look at how confidence in NATO allies affects support for EU defense cooperation. Is there a tradeoff?
```{r}
# Specify All 3 Models
m_model <- lm(confidence_nato_num_01 ~ president_name, data = data_consenting_completes)
y_total <- lm(pref_EU_num_01 ~ president_name, data = data_consenting_completes)
y_direct <- lm(pref_EU_num_01 ~ president_name + confidence_nato_num_01, data = data_consenting_completes)

# Extract path coefficients by term name
a_path <- tidy(m_model) %>% filter(term == "president_nameTrump")    # path a coefficient
b_path <- tidy(y_direct) %>% filter(term == "confidence_nato_num_01") # path b coefficient
c_prime_path <- tidy(y_direct) %>% filter(term == "president_nameTrump")  # path c' coefficient
c_path <- tidy(y_total) %>% filter(term == "president_nameTrump")    # total effect c coefficient

# Extract Coefficients and P Values
coef_a <- round(a_path$estimate, 3)
coef_b <- round(b_path$estimate, 3)
coef_c_prime <- round(c_prime_path$estimate, 3)
coef_c <- round(c_path$estimate, 3)

annotations <- tibble(
  path = c("a", "b", "c'"),
  x = c(2, 4, 3),
  y = c(2.7, 2.7, 0.7),
  estimate = c(coef_a, coef_b, coef_c_prime),
  p_value = c(a_path$p.value, b_path$p.value, c_prime_path$p.value)
)

# Define nodal positions for diagram
nodes <- data.frame(
  label = c("Trump \nvs. Control", "Confidence\nin NATO Allies", "Preference\nfor EU"),
  x = c(1, 3, 5),
  y = c(1, 3, 1)
)

#Add halfway points for arrows
annotations <- annotations %>%
  mutate(
    x_mid = case_when(
      path == "a" ~ (1.6 + 2.4) / 2,
      path == "b" ~ (3.6 + 4.4) / 2,
      path == "c'" ~ (1.6 + 4.4) / 2,
      TRUE ~ x  
    ),
    y_mid = case_when(
      path == "a" ~ (1.6 + 2.4) / 2,
      path == "b" ~ (2.4 + 1.6) / 2,
      path == "c'" ~ 1,
      TRUE ~ y
    )
  )

# Make the mediation diagram
mediation_Y3 <- ggplot() +
  # Draw node boxes
  geom_rect(aes(xmin=x-0.6, xmax=x+0.6, ymin=y-0.6, ymax=y+0.6), fill="grey95", color="grey50", data=nodes) +
  geom_text(aes(x=x, y=y, label=label), data=nodes, size=4, fontface="bold") +
  # Arrows
  geom_segment(aes(x=1.6, y=1.6, xend=2.4, yend=2.4), arrow=arrow(length=unit(0.2,"cm")), size=1, color="grey20") + # X to M
  geom_segment(aes(x=3.6, y=2.4, xend=4.4, yend=1.6), arrow=arrow(length=unit(0.2,"cm")), size=1, color="grey20") + # M to Y
  geom_segment(aes(x=1.6, y=1, xend=4.4, yend=1), arrow=arrow(length=unit(0.2,"cm")), size=1, color="grey20") +     # X to Y
  #P values
  geom_label(data = annotations,
           aes(x = x_mid, y = y_mid,
               label = paste0(path, " = ", estimate, " ", 
                              case_when(
                                p_value < 0.001 ~ "***",
                                p_value < 0.01 ~ "**",
                                p_value < 0.05 ~ "*",
                                TRUE ~ ""
                              ))),
           fill = "white",     
           color = "black",
           size = 5,
           label.padding = unit(0.15, "lines"),
           label.r = unit(0.2, "lines")) +
  labs(caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05")+
  theme_void()
print(mediation_Y3)

# Make the diagram for X --> Y without M 
nodes_c <- data.frame(
  label = c("Trump \nvs. Control", "Preference\nfor EU"),
  x = c(1, 5),
  y = c(1, 1)
)
# Calculate midpoint for arrow between the two nodes in nodes_c
x_mid <- mean(c(nodes_c$x[1], nodes_c$x[2]))  # midpoint x between 1 and 5 = 3
y_mid <- mean(c(nodes_c$y[1], nodes_c$y[2]))  # midpoint y between 1 and 1 = 1

# Use midpoint coordinates for c_path annotation
c_annotations <- tibble(
  path = "c",
  x = x_mid,
  y = y_mid + 0.3,
  estimate = coef_c,
  p_value = c_path$p.value
)

X_on_Y3 <- ggplot() +
  geom_rect(aes(xmin = x - 0.6, xmax = x + 0.6, ymin = y - 0.6, ymax = y + 0.6),
            fill = "grey95", color = "grey50", data = nodes_c) +
  geom_text(aes(x = x, y = y, label = label), data = nodes_c, size = 4, fontface = "bold") +
  geom_segment(aes(x = 1.6, y = 1, xend = 4.4, yend = 1),
               arrow = arrow(length = unit(0.03, "npc")), size = 1.3, color = "grey20") +
  geom_label(data = c_annotations,
            aes(x = x_mid, y = y_mid,
                label = paste0(path, " = ", estimate, " ",
                               case_when(
                                   p_value < 0.001 ~ "***",
                                   p_value < 0.01 ~ "**",
                                   p_value < 0.05 ~ "*",
                                   p_value < 0.10 ~ "!",
                                   TRUE ~ ""
                               ))),
            color = "black",
            size = 5) +
  theme_void()
print(X_on_Y3)

library(patchwork)
Y3_diagram <- X_on_Y3 / mediation_Y3
print(Y3_diagram)
ggsave("EU_diagram.png", plot = Y3_diagram, width=6.5, height=4)
```

#Robustness Checks with Ordered Logit
```{r}
#First, we re-run our main analyses of H1. We run the models with and without controls. 
#The only difference is that we use the 1-5 version of the DV, since the rescaled 0-1 variable cannot easily be compared to the Logit model. 
hyp1mod <- lm(dv_spend_num_01 ~ president_name +
              pre_treat_spend_num, 
              data = data_Harris_ref)
summary(hyp1mod)
class(data_Harris_ref$dv_spend_num_01)
#Now with controls
hyp1mod_controls <- lm(dv_spend_num_01 ~ president_name + 
                             pre_treat_spend_num +   
                             country + 
                             age_name + 
                             as.factor(gender_num) +
                             education_num + 
                             political_interest_num + 
                             income_num +
                             lr_scale_num +
                             us_opinion_num + 
                             nato_opinion_num +
                             economy_opinion_num + 
                             threat_perception_num
                           ,
                           data = data_Harris_ref)
summary(hyp1mod_controls)

#Now we re-run the exact same models with the ordered logit model instead of OLS
#The DV must be an ordered factor before plugging into ordered logit
# Use original 1-5 variable to create ordered factor
data_Harris_ref$dv_spend_ordered <- factor(data_Harris_ref$dv_spend_num, ordered = TRUE, levels = 1:5)
data_consenting_completes$dv_spend_ordered <- factor(data_consenting_completes$dv_spend_num, ordered = TRUE, levels = 1:5)

#Check class
class(data_Harris_ref$dv_spend_ordered) #should read ordered factor!
class(data_consenting_completes$dv_spend_ordered)

#Proceed with ordered logit main model
hyp1mod_logit <- polr(dv_spend_ordered ~ president_name +  
              pre_treat_spend_num, 
              data = data_Harris_ref, Hess = T)
summary(hyp1mod_logit)

#Now with controls
hyp1mod_controls_logit <- polr(dv_spend_ordered ~ president_name + 
                             pre_treat_spend_num +   
                             country + 
                             age_name + 
                             as.factor(gender_num) +
                             education_num + 
                             political_interest_num + 
                             income_num +
                             lr_scale_num +
                             us_opinion_num + 
                             nato_opinion_num +
                             economy_opinion_num + 
                             threat_perception_num
                           ,
                           data = data_Harris_ref, Hess = T)
summary(hyp1mod_controls_logit)

#Compare all 4 models in one table
robustness_h1_list <- list(
  "OLS Main Model" = hyp1mod, 
  "OLS With Controls" = hyp1mod_controls, 
  "Ordered Logit Main Model" = hyp1mod_logit,
  "Ordered Logit With Controls" = hyp1mod_controls_logit
)

coef_list <- c(
  "president_nameTrump" = "Trump vs. Harris",
  "president_namecontrol" = "Control vs. Harris",
  "pre_treat_spend_num" = "Pre-Treatment Opinion on Defense Spending",
  "countrySE" = "Sweden",
  "countryUK" = "UK",
  "age_name" = "Age",
  "as.factor(gender_num)2" = "Gender: Female", 
  "as.factor(gender_num)3" = "Gender: Other",
  "education_num" = "Education Level",
  "political_interest_num" = "Political Interest",
  "income_num" = "Income Level",
  "lr_scale_num" = "Ideology on a L-R Scale",
  "us_opinion_num" = "Support for Close Relations with the US",
  "nato_opinion_num" = "Favorability of NATO Membership",
  "economy_opinion_num" = "Perceived State of the Economy",
  "threat_perception_num" = "Perceived Threat Posed by Russia")
  
modelsummary(robustness_h1_list, output = "latex", 
             gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)      
#Models are robust in terms of the direction, significance, and substantively consistent effect patterns 
#We don't expect the size of estimates to be consistent due to different modeling assumptions
#but we do see effect size differences that are roughly equivalent to a factor of 10, which makes sense given the Logistic assumption
```

Now we can plot the marginal means results for comparability, across both models. 
```{r}
#Re-specify OLS and ordered logit main model
hyp1mod_OLS <- lm(dv_spend_num ~ president_name + #We use the 1-5 numeric DV so marginal means are comparable on the same scale
              pre_treat_spend_num, 
              data = data_Harris_ref)

hyp1mod_logit <- polr(dv_spend_ordered ~ president_name +  
              pre_treat_spend_num, 
              data = data_Harris_ref, Hess = T)

robustness_means <- list(hyp1mod_OLS, hyp1mod_logit)
predictor_var <- "president_name"
model_names <- c("OLS", "Ordered Logit")

# Compute marginal predicted mean (E[Y]) for each level of predictor_var
get_marginal_means_point <- function(mod, predictor_var) {
  mf <- model.frame(mod)
  if (!predictor_var %in% names(mf)) {
    stop("predictor_var '", predictor_var, "' not found in the model frame.")
  }
  orig_pred <- mf[[predictor_var]]
  levels_pred <- if (is.factor(orig_pred)) levels(orig_pred) else sort(unique(orig_pred))

  map_dfr(levels_pred, function(lvl) {
    newdata <- mf
    if (is.factor(orig_pred)) {
      newdata[[predictor_var]] <- factor(lvl, levels = levels(orig_pred))
    } else {
      newdata[[predictor_var]] <- lvl
    }

    
    prob_mat <- tryCatch(
      predict(mod, newdata = newdata, type = "prob"),
      error = function(e) NULL,
      warning = function(w) NULL
    )

    if (!is.null(prob_mat)) {
      probs <- as.data.frame(prob_mat)
      cn <- colnames(probs)
      vals_num <- suppressWarnings(as.numeric(cn))
      if (any(is.na(vals_num))) {
        vals_num <- seq_along(cn)
      }

      expected_per_row <- as.numeric(as.matrix(probs) %*% vals_num) 
      # converts probability of being in category "1" on the Likert to numeric expected value for each observation
    } else {
      
      preds <- tryCatch(
        predict(mod, newdata = newdata, type = "response"),
        error = function(e) stop("Can't get predictions for this model class. Consider using a predict method that returns 'prob' or 'response'.")
      )
      expected_per_row <- as.numeric(preds)
    }

    tibble(!!predictor_var := lvl, mean_pred = mean(expected_per_row))
  })
}

# Apply to list of models, keep model name
mm_list <- map2(robustness_means, model_names, function(mod, nm) {
  get_marginal_means_point(mod, predictor_var) %>% mutate(model = nm)
})

mm_df <- bind_rows(mm_list) %>% 
  mutate(president_name = case_when(
  president_name == "control" ~ "Control", 
  TRUE ~ president_name))

# Order levels of predictor by the average predicted mean 
order_levels <- mm_df %>%
  group_by(.data[[predictor_var]]) %>%
  summarize(avg = mean(mean_pred), .groups = "drop") %>%
  arrange(avg) %>%
  pull(.data[[predictor_var]])

mm_df[[predictor_var]] <- factor(mm_df[[predictor_var]], levels = order_levels)

# Plot
h1_robustness_plot <- ggplot(mm_df, aes(x = .data[[predictor_var]], y = mean_pred, group = model)) +
  geom_point(size = 3) +
  geom_line(aes(linetype = model), linewidth = 0.9) +
  labs(
    title = "Predicted Marginal Means",
    x = "President Treatment",
    y = "Predicted Willingness to Spend (1–5 Scale)",
    linetype = "Model"
  ) +
  theme_bw() +
  theme(legend.position = "top",
        legend.title = element_text(size = 12, face = "plain"),
        title = element_text(size = 14, face = "bold"),
        axis.title.x = element_text(size = 12, face = "bold"),  
        axis.title.y = element_text(size = 12, face = "bold"),
        strip.text = element_text(size = 12),
        axis.text.x = element_text(size = 10)) 
print(h1_robustness_plot)
ggsave("h1_robustness_plot.png", plot = h1_robustness_plot, width = 5, height = 5)
```

We repeat the same procedure for the main H2 models (one for each of the institutional DVs).
```{r}
#Recall the original models from H2:
print(hyp2_model1_list)
#Extract the models of interest
hyp2_OLS_NATO <- hyp2_model1_list[[1]]
hyp2_OLS_EU <- hyp2_model1_list[[2]]
hyp2_OLS_Autonomy <- hyp2_model1_list[[3]]

#Before fitting ordered logit, create ordered factor DVs for each model
data_Harris_ref$pref_NATO_ordered <- factor(data_Harris_ref$pref_NATO_num, ordered = TRUE, levels = 1:5)
data_Harris_ref$pref_EU_ordered <- factor(data_Harris_ref$pref_EU_num, ordered = TRUE, levels = 1:5)
data_Harris_ref$pref_Autonomy_ordered <- factor(data_Harris_ref$pref_Autonomy_num, ordered = TRUE, levels = 1:5)

#Proceed with ordered logit models
hyp2_logit_NATO <- polr(pref_NATO_ordered ~ president_name, 
              data = data_Harris_ref, Hess = T)
summary(hyp2_logit_NATO)

hyp2_logit_EU <- polr(pref_EU_ordered ~ president_name, 
              data = data_Harris_ref, Hess = T)
summary(hyp2_logit_EU)

hyp2_logit_Autonomy <- polr(pref_Autonomy_ordered ~ president_name, 
              data = data_Harris_ref, Hess = T)
summary(hyp2_logit_Autonomy)

#Compare all 6 models in one table
robustness_h2_list <- list(
  "OLS NATO Model" = hyp2_OLS_NATO, 
  "Ordered Logit NATO Model" = hyp2_logit_NATO, 
  "OLS EU Model" = hyp2_OLS_EU, 
  "Ordered Logit EU Model" = hyp2_logit_EU, 
  "OLS Autonomy Model" = hyp2_OLS_Autonomy, 
  "Ordered Logit Autonomy Model" = hyp2_logit_Autonomy)

modelsummary(robustness_h2_list, output = "latex", 
             gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)      
```

Now plot for comparability:
```{r}
#Respecify the models of interest
hyp2_OLS_NATO <- lm(pref_NATO_num ~ president_name, #Again the 1-5 numeric DV for comparison
              data = data_Harris_ref)

hyp2_OLS_EU <- lm(pref_EU_num ~ president_name, 
              data = data_Harris_ref)

hyp2_OLS_Autonomy <- lm(pref_Autonomy_num ~ president_name, 
              data = data_Harris_ref)

#New list updated
robustness_h2_list <- list(hyp2_OLS_NATO, hyp2_logit_NATO, hyp2_OLS_EU, hyp2_logit_EU, hyp2_OLS_Autonomy, hyp2_logit_Autonomy)

predictor_var <- "president_name"

# Assign names for plotting: DV and model type
dv_names <- c("NATO", "EU", "Autonomy")
model_types <- c("OLS", "Ordered Logit")

# Apply existing function and attach dv and model_type 
mm_list <- map2(seq_along(robustness_h2_list), robustness_h2_list, function(i, mod) {
  dv_index <- ceiling(i / 2)               # 2 models per DV
  model_type <- model_types[(i - 1) %% 2 + 1]
  dv_name <- dv_names[dv_index]
  
  get_marginal_means_point(mod, predictor_var) %>% #same function as previously
    mutate(model_type = model_type, dv = dv_name)
})

mm_df <- bind_rows(mm_list) %>%
  mutate(president_name = case_when(
    president_name == "control" ~ "Control",
    TRUE ~ president_name
  ))

# Order predictor levels by average predicted mean 
order_levels <- mm_df %>%
  group_by(.data[[predictor_var]]) %>%
  summarize(avg = mean(mean_pred), .groups = "drop") %>%
  arrange(avg) %>%
  pull(.data[[predictor_var]])

mm_df[[predictor_var]] <- factor(mm_df[[predictor_var]], levels = order_levels)

# Faceted plot
h2_robustness_plot <- ggplot(mm_df, aes(x = .data[[predictor_var]], y = mean_pred, shape = dv, group = interaction(dv, model_type))) +
  geom_point(size = 3) +
  geom_line(aes(linetype = model_type), linewidth = 0.9) +
  labs(
    title = "Predicted Marginal Means ",
    x = "President Treatment",
    y = "Predicted Institutional Preference (1–5 Scale)",
    linetype = "Model",
    shape = "Institution"
  ) +
  theme_bw() +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12, face = "plain"),
    title = element_text(size = 14, face = "bold"),
    axis.title.x = element_text(size = 12, face = "bold"),  
    axis.title.y = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(size = 10)
  )
print(h2_robustness_plot)
ggsave("h2_robustness_plot.png", plot = h2_robustness_plot, height = 5, width = 6)
```

#Robustness Checks with Treatment Compliers
We rerun H1 and H2 only for the subset of respondents who were strongly convinced by the treatment. 
```{r}
## Filter Data 
#Coalesce Election Expectations
data_compliance_check <- data_Harris_ref %>%
  mutate(
    election_expectation_name = case_when(
      election_expectation_name %in% c("I don't know", "Ich weiß es nicht", "Vet inte") ~ "Don't Know",
      TRUE ~ election_expectation_name  # Keep other values the same
    ))
#Check Grouping
data_compliance_check %>%
  group_by(president_name) %>%
  count(election_expectation_name) 

#Hard Compliers Harris: 483
#Hard Compliers Trump: 532

#Filter
compliers_data <- data_compliance_check %>%
  filter(
    (president_name == "Harris" & election_expectation_name == "Kamala Harris") |
      (president_name == "Trump" & election_expectation_name == "Donald Trump")
  ) %>%
  filter(election_expectation_name != "Don't Know")

#Cross-Check
compliers_data %>%
  group_by(president_name, country, gender_num) %>%
  count(election_expectation_name) 

compliers_data %>% 
  count(gender_num, country)

## H1 ---- Effect of HW on Spending (Trump vs Harris)
h1_compliers <- lm(dv_spend_num_01 ~ president_name +
                     pre_treat_spend_num,
                   data = compliers_data) #Only using Compliers
summary(h1_compliers)

#Recall main model object
summary(hyp1mod)

compare_compliers <- list(
     "Main Model" = hyp1mod, 
     "Only Compliers Model" = h1_compliers)

modelsummary(compare_compliers, output = "latex", 
             gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)   
#Robust results, just a bit less significant for the subsample with president treatment and compliers only

## H2 ---- Effect of HW on Spending (Trump vs Harris)
h2_compliers_nato <- lm(pref_NATO_num_01 ~ president_name,
                   data = compliers_data) #Only using Compliers

h2_compliers_eu <- lm(pref_EU_num_01 ~ president_name,
                   data = compliers_data) #Only using Compliers

h2_compliers_autonomy <- lm(pref_Autonomy_num_01 ~ president_name,
                   data = compliers_data) #Only using Compliers

#Recall main model objects
#Extract the models of interest
hyp2_model1_list

hyp2_OLS_NATO <- hyp2_model1_list[[1]]
hyp2_OLS_EU <- hyp2_model1_list[[2]]
hyp2_OLS_Autonomy <- hyp2_model1_list[[3]]

compare_h2_compliers <- list(
  "Main - NATO" = hyp2_OLS_NATO,
  "Compliers - NATO" = h2_compliers_nato,
  "Main - EU" = hyp2_OLS_EU,
  "Compliers - EU" = h2_compliers_eu,
  "Main - Autonomy" = hyp2_OLS_Autonomy,
  "Compliers - Autonomy" = h2_compliers_autonomy
)

modelsummary(compare_h2_compliers, output = "latex", 
             gof_omit = "BIC|AIC|Log.Lik.", coef_map = coef_list, stars = TRUE)  
#Across the board, effect sizes are larger and more significant for compliers...direction is always consistent with our main findings
```

# Ideology as a Moderator
```{r}
#H1 Interaction Effects: LR
table(data_Harris_ref$lr_scale_num)
h1_model_LR <- lm(dv_spend_num_01 ~ president_name * lr_scale_num * country +
                    pre_treat_spend_num,
                  data = data_Harris_ref) 

h1_marginal_LR <- comparisons(
  h1_model_LR,
  variables = "president_name", 
  by = c("lr_scale_num", "country") 
)
h1_marginal_LR_df <- as.data.frame(h1_marginal_LR)
h1_marginal_LR_df <- h1_marginal_LR_df %>%
  mutate(
    ci95_high = estimate + 1.96 * std.error,
    ci95_low = estimate - 1.96 * std.error,
    ci90_high = estimate + 1.64 * std.error,
    ci90_low = estimate - 1.64 * std.error,
    country = case_when(
      country == "DE" ~ "Germany",
      country == "SE" ~ "Sweden",
      TRUE ~ country)) %>%
   filter(!contrast == "mean(control) - mean(Harris)")

h1_marginal_LR_df$contrast <- factor(h1_marginal_LR_df$contrast,
                                     levels = "mean(Trump) - mean(Harris)")
  
# Plot
h1_marginal_LR_plot <- ggplot(h1_marginal_LR_df, aes(x = lr_scale_num, y = estimate, color = contrast)) +
  geom_point(position = position_dodge(width = 0.5), size = 2) +
  geom_linerange(aes(ymin = ci95_low, ymax = ci95_high), position = position_dodge(width = 0.5), linewidth = 0.5) +
  geom_linerange(aes(ymin = ci90_low, ymax = ci90_high), position = position_dodge(width = 0.5), linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  facet_wrap(~country) +
  labs(
    x = "Ideology on a L-R Scale",
    y = "Estimated Marginal Effect on Willingness to Spend",
    color = "President Treatment",
    caption = "Significance levels: *** p < 0.001, ** p < 0.01, * p < 0.05"
  ) +
  scale_color_manual(
    values = c("mean(Trump) - mean(Harris)" = "purple4"),
    labels = c("mean(Trump) - mean(Harris)" = "Trump vs. Harris")
  ) +
  geom_text(aes(
    x = as.numeric(lr_scale_num) + case_when(
      contrast == "mean(Trump) - mean(Harris)" ~ 0.45,  
      TRUE ~ 0
    ),
    y = estimate,  
    label = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01  ~ "**",
      p.value < 0.05  ~ "*",
      TRUE            ~ ""
    )),
    size = 3, 
    color = "black"
  ) +
  theme_bw() +
 theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top")
ggsave("h1_marginal_LR.png", plot = h1_marginal_LR_plot, width = 10, height = 5)

#Linear Version
h1_LR_plot_linear <- ggplot(h1_marginal_LR_df, aes(x = lr_scale_num, y = estimate, group = contrast, fill = contrast)) +
  geom_ribbon(aes(ymin = ci95_low, ymax = ci95_high), alpha = 0.2, color = NA) +  
  geom_ribbon(aes(ymin = ci90_low, ymax = ci90_high), alpha = 0.2, color = NA) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  facet_wrap(~country) +
  labs(
    x = "Ideology on a L-R Scale",
    y = "Estimated Marginal Effect on Willingness to Spend",
    color = "President Treatment",
    fill = "President Treatment",
  ) +
  scale_fill_manual(values = "purple4", labels = "Trump vs. Harris") +
  theme_bw() +
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top")

###### Common Support LR
hist_LR <- data_consenting_completes %>%
  filter(!president_name == "control") %>% 
  count(country, president_name, lr_scale_num) %>% 
  mutate(country = case_when(
    country == "DE" ~ "Germany",
    country == "SE" ~ "Sweden",
    TRUE ~ country)) 
view(hist_LR)

LR_commonsupport <- hist_LR %>% 
  ggplot(aes(x = lr_scale_num, y = n, fill = president_name)) +
  geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.7) +
  facet_wrap(~country) +
  labs(
    x = "Ideology on a L-R Scale, (1-11)",
    y = "Number of Observations",
    fill = "President Treatment"
  ) +
  scale_fill_manual(values = c("Harris" = "steelblue", 
                               "Trump" = "firebrick"),
                    labels = c("Harris" = "Harris Condition", 
                               "Trump" = "Trump Condition")
  ) +
  theme_bw() +
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top")
print(LR_commonsupport)
h1_LR_interaction <- h1_LR_plot_linear / LR_commonsupport + plot_layout(heights = c(2.5, 1.5))
ggsave("h1_marginal_LR.png", plot = h1_LR_interaction, width = 7, height = 8.5)
```

#Manipulation Check
How effective was the treatment at changing election expectations, given the 50-50 odds?
```{r}
#Manipulation Check: Effect of President Treatment on Election Result
##Multinomial logistic regression because DV is nominal with more than 2 categories
##Coalesce Don't Knows into one category for regression 
data_manipulation_check <- data_consenting_numeric %>%
  mutate(
    # for the election_expectation variable
    election_expectation_name = case_when(
      election_expectation_name %in% c("I don't know", "Ich weiß es nicht", "Vet inte") ~ "Don't Know",
      TRUE ~ election_expectation_name  # Keep other values the same
    ))
#Fit the multinomial model
multinom_manipulation_model <- multinom(election_expectation_name ~ president_name + country,
                               data = data_manipulation_check)
summary(multinom_manipulation_model)

#Run predicted probabilities for each election expectation category
head(pp <- fitted(multinom_manipulation_model)) 
pp <- as.data.frame(pp)      
pp_results <- cbind(
  president_name = data_manipulation_check$president_name,
  country = data_manipulation_check$country,  # Reintroduce country
  pp
)
View(pp_results)

#Summarize Across Country
summary_pp <- pp_results %>% 
  group_by(president_name, country) %>%  
  summarise(across(c("Don't Know", "Donald Trump", "Kamala Harris"), mean, .names = "mean_{.col}"))
print(summary_pp)

#Reshape the data to a long format 
summary_pp_long <- summary_pp %>%
  pivot_longer(
    cols = starts_with("mean_"),               
    names_to = "election_expectation_name",    
    values_to = "predicted_probability"        
  ) %>%
  mutate(election_expectation_name = gsub("mean_", "", election_expectation_name)) %>% 
  mutate(president_name = case_when(
      president_name == "control" ~ "Control",
      TRUE ~ president_name
    )) %>% 
  mutate(country = case_when(
    country == "DE" ~ "Germany",
    country == "SE" ~ "Sweden",
    TRUE ~ country))

ggplot(summary_pp_long, aes(x = president_name, y = predicted_probability, fill = election_expectation_name)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(
    aes(label = scales::percent(predicted_probability, accuracy = 1)),  
    position = position_dodge(width = 0.9),  
    vjust = -0.5,                            
    color = "black",                         
    size = 2.5,                               
    fontface = "bold"                        
  ) +
  labs(
    x = "Treatment Group",
    y = "Predicted Probability",
    fill = "Reported Election Expectation"
  ) +
  scale_fill_manual(values = c("Kamala Harris" = "steelblue1", "Donald Trump" = "firebrick1", 
                               "Don't Know" = "grey")) +
  facet_wrap(~country) +
  theme_bw() +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 0.65)) +  # Set y-axis max to 0.7
   theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top")
ggsave("manipulation_check_electionexpectation.png", plot = last_plot(), width = 8, height = 5)


#Bar Plot without Predicted Probabilities
# Summarize and compute percentages
library(scales)
election_expectation_data <- data_manipulation_check %>%
  group_by(country, president_name, election_expectation_name) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(country, president_name) %>%
  mutate(percent = count / sum(count)) %>%
  ungroup() %>%
  mutate(
    president_name = case_when(
      president_name == "control" ~ "Control",
      TRUE ~ president_name
    ),
    country = case_when(
      country == "DE" ~ "Germany",
      country == "SE" ~ "Sweden",
      TRUE ~ country
    ))

# Bar plot: percentage of total responses
ggplot(election_expectation_data, aes(x = president_name, y = percent, fill = election_expectation_name)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = percent_format()) +
  facet_wrap(~country) +
  geom_text(
    aes(label = percent(percent, accuracy = 1)),
    position = position_dodge(width = 0.9),
    vjust = -0.3,
    size = 3
  ) +
  scale_fill_manual(values = c("Kamala Harris" = "steelblue1", "Donald Trump" = "firebrick1", 
                               "Don't Know" = "grey")) +
  labs(
    x = "President Name",
    y = "Percent of Responses (within each country)",
    fill = "Reported Election Expectation",
  ) +
  theme_bw() +
 theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "top")
```
